#爬取京东匿名购物车内商品信息
#需要提供浏览器获取的cookie

import requests,json
from pyquery import PyQuery as pq
from requests.exceptions import RequestException

def getPage(url):
	'''爬取网页函数'''
	try:
		#伪装浏览器head头
		headers = {
			'referer': 'https://www.jd.com/',
			'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36',
			'cookie': 'ipLoc-djd=1-72-4137-0; areaId=1; __jdv=122270672|direct|-|none|-|1539521424940; __jdc=122270672; PCSYCityID=2; shshshfpa=3e15e503-2301-957c-0f41-cc7ad493d3d6-1539615593; shshshfpb=0bce0429c771c45c77c231c20f4e1ebbeada2fb96196adcfa5bc4ab6be; user-key=ef3ce5cd-f381-46a9-bb36-f10514226529; cd=0; shshshfp=9ce93d6d9c8e00e189f84ffa4fece377; __jdu=15396219419381982080203; cart-main=xx; cn=2; __jda=122270672.15396219419381982080203.1539621942.1539621942.1539624247.2; shshshsID=d4d05c5e5eda1741c658e52fa5b97cf3_9_1539625193672; 3AB9D23F7A4B3C9B=OHQV6UILZEQS7VGGH2DT5CZ2SAAKESU23YOQGOYGCG3YPJ7XH75IR7DDKQLEPJIQ26M36VOVCGDF6FN2ZTXF5GF2BM; __jdb=122270672.12.15396219419381982080203|2.1539624247; wlfstk_smdl=bful97qlr3104jq8ng3uqsj81qitpjf1',	
		}
		#使用session爬取
		s = requests.session()
		res = s.get(url=url,headers=headers)
		if res.status_code == 200: #成功后返回
			print("网页爬取成功")
			return res.text
		else:
			return None
	except RequestException:
		return None


def parsePage(content):
	'''解析爬取的内容'''
	#用pyquery解析
	doc = pq(content)
	items = doc("div.item-form")
	for item in items.items():
	    yield {
	        '图片':"https:"+item.find("img").attr("src"),
	        '名称':item.find("div.p-name a").text(),
	        '描述':item.find("div.props-txt").text(),
	        '单价':item.find("p.plus-switch strong").text(),
	        '数量':item.find("div.quantity-form input").attr("value"),
	        '重量':item.find("span.weight").text(),
	        '总金额':item.find("div.p-sum strong").text()
	    }

def writeFile(content):
	'''写入jd.txt'''
	with open('./jd.txt','a',encoding="utf-8") as f:
		f.write(json.dumps(content,ensure_ascii=False) + "\n")


#主程序函数
def main():
	#url
	jd_cart_url = "https://cart.jd.com/cart.action"
	#爬取网页
	html = getPage(jd_cart_url)
	if html:
		for item in parsePage(html):
			#print(item)
			writeFile(item)


#主程序
if __name__ =='__main__':
	main()